{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "ChatBot - Day 2.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZY2tOpvsanoH"
      },
      "source": [
        "import random\n",
        "import nltk\n",
        "import json\n",
        "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.model_selection import train_test_split"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W-j5FF8ib0rr"
      },
      "source": [
        "# BOT_CONFIG = {\n",
        "#     'intents': {\n",
        "#         'hello': {\n",
        "#             'examples': ['Привет!', 'Здарова', 'Хей-хей!!'],\n",
        "#             'responses': ['Хай', 'Добрый вечер!', 'Здравствуйте!']\n",
        "#         },\n",
        "#         'bye': {\n",
        "#             'examples': ['Пока', 'Увидимся!', 'Покеда'],\n",
        "#             'responses': ['До свидания', 'Прощайте', 'Сайонара!']\n",
        "#         }\n",
        "#     }\n",
        "# }\n",
        "with open('/content/BOT_CONFIG.json', 'r') as f:\n",
        "  BOT_CONFIG = json.load(f)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XsTSV9h04pcm"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AgZvlkLpiwWI"
      },
      "source": [
        "# День 1"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5NtkA4HjdTWb"
      },
      "source": [
        "def clean(text):\n",
        "  text = text.lower()\n",
        "  cleaned_text = ''\n",
        "  for ch in text:\n",
        "    if ch in 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя ':\n",
        "      cleaned_text = cleaned_text + ch\n",
        "  return cleaned_text\n",
        "\n",
        "def get_intent(text):\n",
        "  for intent in BOT_CONFIG['intents'].keys():\n",
        "    for example in BOT_CONFIG['intents'][intent]['examples']:\n",
        "      w1 = clean(example)\n",
        "      w2 = clean(text)\n",
        "      if nltk.edit_distance(w1, w2) / max(len(w1), len(w2)) < 0.4:\n",
        "        return intent\n",
        "  return 'интент не найден'"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uIe5ey1ni2FE"
      },
      "source": [
        "# День 2"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wNl1ljC5i4cF",
        "outputId": "ba49dcbc-edcb-4a6a-c459-3f9cbd25a0cf"
      },
      "source": [
        "X = []\n",
        "y = []\n",
        "\n",
        "for intent in BOT_CONFIG['intents'].keys():\n",
        "    try:\n",
        "        for example in BOT_CONFIG['intents'][intent]['examples']:\n",
        "            X.append(example)\n",
        "            y.append(intent)\n",
        "    except:\n",
        "        pass\n",
        "\n",
        "len(X), len(y), len(set(y))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(841, 841, 243)"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mnh4irQurVRv",
        "outputId": "f137ae2f-eb44-4c77-a911-ef2fb0be20fe"
      },
      "source": [
        "X_train, X_test, y_train, y_test = train_test_split(\n",
        "    X, y, test_size=0.2, random_state=42)\n",
        "len(X_train), len(X_test)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(672, 169)"
            ]
          },
          "metadata": {},
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OJUrFOGtkdz3",
        "outputId": "46b7fb74-be23-413a-aa3b-d031bbdb77c8"
      },
      "source": [
        "vectorizer = CountVectorizer(preprocessor=clean, analyzer='char', ngram_range=(2,3))\n",
        "X_train_vect = vectorizer.fit_transform(X_train)\n",
        "X_test_vect = vectorizer.transform(X_test)\n",
        "\n",
        "len(vectorizer.get_feature_names())"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "2476"
            ]
          },
          "metadata": {},
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7eOGJ7fzlQkX",
        "outputId": "bbad56d4-93e8-406f-b2f4-2238e34e9651"
      },
      "source": [
        "log_reg = LogisticRegression(C=0.2)\n",
        "log_reg.fit(X_train_vect, y_train)\n",
        "log_reg.score(X_train_vect, y_train)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.8065476190476191"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ranBVVyprytC",
        "outputId": "59da4197-cc13-461b-8bea-dfd1da6661ba"
      },
      "source": [
        "log_reg.score(X_test_vect, y_test)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.1834319526627219"
            ]
          },
          "metadata": {},
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yEwIipd7l7gk"
      },
      "source": [
        "def get_intent_by_model(text):\n",
        "    return log_reg.predict(vectorizer.transform([text]))[0]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WugqtKIamUYE"
      },
      "source": [
        "def bot(question):\n",
        "  intent = get_intent_by_model(question)\n",
        "  return random.choice(BOT_CONFIG['intents'][intent]['responses'])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XZf6LMvOt9MK"
      },
      "source": [
        "question = ''\n",
        "while True:\n",
        "  question = input()\n",
        "  if question != 'стоп':\n",
        "      answer = bot(question)\n",
        "      print(answer)\n",
        "  else:\n",
        "      break"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "D4ER3h-euaUs",
        "outputId": "7695ae87-58f8-4eac-aa1a-008ae7f13624"
      },
      "source": [
        "bot(question)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            },
            "text/plain": [
              "'у тебя все наладится!'"
            ]
          },
          "metadata": {},
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WaOnb7Igwgx-"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}